With Wan-hsuan, starting from 2023 Oct
C:_R_from2019\1st_2nd_PCR_v106_hts_tube_jp_orderform20191204.xls Eurofins (515F, 806R) ### [1.5] Miseq sequence (Run ID: RMR152)
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.6-4
library(ggeffects)
library(ggplot2)
library(parallel)
set.seed(1234567)
sample_info <- read.csv("SampleSheet_RMR152.csv", skip = 19, header = T)
sample_info
#loading data
raw_bac_count <- read.csv("bacterial_count_sumitomo.csv", header = T)
raw_bac_count
#calculating the averages of ten repetitions for each example
average <- apply(raw_bac_count[3:12], 1, mean)
#converting the averaged raw count to cell density (cells/mL) using conversion factors
A <- 210 #effective filtered area (mm^2)
G <- 77*77*1.0e-6 #area of one vision (mm^2)
V <- 1.0 #volume of filtered sample (ml)
cell_density <- average*(A/G)/V
#integrating into the dataframe
bac_cell_density <- data.frame(month = raw_bac_count$month, location = raw_bac_count$river, average_count = average, density = cell_density)
bac_cell_density
saveRDS(bac_cell_density, "bac_cell_density.obj")
MF_P <- readRDS("./Jan2024v3/MF_list/MF_integ14_ave_matrix_raw.obj")
head(MF_P)
Sample_1 Sample_2 Sample_3 Sample_4 Sample_5 Sample_6 Sample_7 Sample_8 Sample_9
treshold_0 21 29 30 30 30 15 31 30 30
treshold_0.05 18 29 29 30 30 10 31 30 30
treshold_0.1 17 29 29 30 29 8 31 30 30
treshold_0.15 16 29 28 30 29 5 31 28 30
treshold_0.2 16 29 28 29 29 4 29 28 30
treshold_0.25 13 29 27 29 29 2 28 27 30
Sample_10 Sample_11 Sample_12 Sample_13 Sample_14 Sample_15 Sample_16 Sample_17
treshold_0 28 22 30 31 30 29 30 30
treshold_0.05 28 13 30 30 29 27 30 30
treshold_0.1 26 10 30 30 29 27 30 30
treshold_0.15 26 5 30 30 28 26 30 30
treshold_0.2 26 3 30 29 28 21 28 30
treshold_0.25 24 2 30 29 28 17 25 30
Sample_18 Sample_19 Sample_20
treshold_0 28 30 30
treshold_0.05 28 30 30
treshold_0.1 28 30 30
treshold_0.15 28 30 30
treshold_0.2 28 30 30
treshold_0.25 28 30 30
List of ASV composition before coverage-based standardization but ASVs from mitochondria and chloroplas excluded
raw_prokaryote_ASVcomposition_info <- as.data.frame( readRDS("./Jan2024v3/ASV_composition/table_ASV_no-mitochondria-no-chloroplas_biwako_dada2_Jan2024.obj"))
#The number of ASVs that were detected in two negative samples
apply(raw_prokaryote_ASVcomposition_info > 0, 1, sum)[21:22]
s021_prokaryote.forward.fastq s022_prokaryote.forward.fastq
12 7
#The total reads that were detected in two negative samples
apply(raw_prokaryote_ASVcomposition_info, 1, sum)[21:22]
s021_prokaryote.forward.fastq s022_prokaryote.forward.fastq
29 16
List of sample (data set) info, bacterial abundance, sampling coverage (cov_exclu), #reads (seq_exclu), #observed ASVs (rich_exclu)
attributes_sample <- readRDS("./Jan2024v3/attri_exclu_undeter.eukar_Jan2024.obj")
attributes_sample$sample_ID <- c("S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S09", "S10", "S11", "S12", "S13", "S14", "S15", "S16", "S17", "S18", "S19", "S20")
attributes_sample[, -c(4,5)]
ASV table being rarefied with the coverage = 0.9232 and then ASVs with 1) poor alignment to PiCRUST2 or 2) with 0 abundance being deleted, noting that the data sets with low coverages were also excluded and 13 data sets remained.
remaining_ID <- c("S01", "S02", "S03", "S04", "S05", "S06", "S07", "S08", "S11", "S17", "S18", "S19", "S20")
stand_ASV_com <- readRDS("./Jan2024v3/ASV_composition/Com_rarefied_exclu_undeter.eukar_AfterNeg_filtered_order_del0ASV_Jan2024.obj")
class(stand_ASV_com)
[1] "data.frame"
colnames(stand_ASV_com)
[1] "Sample_1" "Sample_2" "Sample_3" "Sample_4" "Sample_5" "Sample_6" "Sample_7"
[8] "Sample_8" "Sample_11" "Sample_17" "Sample_18" "Sample_19" "Sample_20"
colnames(stand_ASV_com) <- remaining_ID
head(stand_ASV_com[, 2:5])
KO_com <- readRDS("./Jan2024v3/KO_composition/KO_exclu_undeter.eukar_AfterNeg_filtered_order_del0ASV_Jan2024.obj")
class(KO_com)
[1] "data.frame"
head(KO_com)
Binarization of ASV composition with the threshold 0.0 for ASVs and 1 for KOs
#copy
stand_ASV_com_binarized <- stand_ASV_com
KO_com_binarized <- KO_com
#binarized
stand_ASV_com_binarized[stand_ASV_com_binarized > 0] <- 1.0
stand_ASV_com_binarized[stand_ASV_com_binarized < 0] <- 0.0
KO_com_binarized[KO_com_binarized >= 1.0] <- 1.0
KO_com_binarized[KO_com_binarized < 1.0] <- 0.0
ASV and KO richness for the whole dat sets
ASV_gamma_richness <- sum(apply(stand_ASV_com_binarized, 1, sum) > 0)
KO_gamma_richness <- sum(apply(KO_com_binarized, 2, sum) > 0)
ASV_gamma_richness
[1] 3576
KO_gamma_richness
[1] 7340
ASV and KO richness for each data set
stand_ASV_richness <- apply(stand_ASV_com_binarized, 2, sum)
KO_richness <- numeric(13)
for(j in 1:13) {
#Obtain the KO composition for each data set
KO_list <- t(KO_com_binarized) %*% stand_ASV_com_binarized[, j]
#Calculate the number of KOs
KO_richness[j] <- sum(KO_list > 0)
}
MF_P_filtered0.05 <- MF_P[2, c(1,2,3,4,5,6,7,8,11,17,18,19,20)] #subsets of samples that are remaining for further bioinformatic analyses
plot(stand_ASV_richness, KO_richness)
plot(stand_ASV_richness, MF_P_filtered0.05)
Summarizing basic info into a dataframe
richness_temp <- data.frame(sample_ID = remaining_ID, stand_ASV_richness, KO_richness)
data_summary <- merge(attributes_sample[, -c(4,5)], richness_temp, by = "sample_ID", all = T)
colnames(data_summary)[4:7] <- c("bacterial abundance", "coverage", "no_reads", "obs_ASV_richness")
data_summary <- data.frame(data_summary, MF_P0.05 = MF_P[2, ])
data_summary
richness_MF <- data.frame(ASV_richness = stand_ASV_richness, MF_G = KO_richness, MF_P0.05 = MF_P_filtered0.05, location = data_summary$location[c(1,2,3,4,5,6,7,8,11,17,18,19,20)], date = data_summary$date_sample[c(1,2,3,4,5,6,7,8,11,17,18,19,20)])
richness_MF
plot(richness_MF)
lm_model <- lm(MF_P0.05 ~ MF_G, data = richness_MF)
summary(lm_model)
Call:
lm(formula = MF_P0.05 ~ MF_G, data = richness_MF)
Residuals:
Min 1Q Median 3Q Max
-9.4562 -0.9701 0.9245 1.3430 4.7861
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -39.208828 9.942854 -3.943 0.0023 **
MF_G 0.009584 0.001455 6.588 3.93e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.408 on 11 degrees of freedom
Multiple R-squared: 0.7978, Adjusted R-squared: 0.7794
F-statistic: 43.4 on 1 and 11 DF, p-value: 3.926e-05
plot(MF_P0.05 ~ MF_G, data = richness_MF)
abline(lm_model)
#glm and stepwise model selection
summary(step(glm(MF_P0.05 ~ MF_G + ASV_richness, data = richness_MF, family = poisson(link = "log"))))
Start: AIC=77.07
MF_P0.05 ~ MF_G + ASV_richness
Df Deviance AIC
- ASV_richness 1 5.5179 75.171
<none> 5.4150 77.068
- MF_G 1 10.0893 79.742
Step: AIC=75.17
MF_P0.05 ~ MF_G
Df Deviance AIC
<none> 5.5179 75.171
- MF_G 1 28.8685 96.522
Call:
glm(formula = MF_P0.05 ~ MF_G, family = poisson(link = "log"),
data = richness_MF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.00659 -0.01277 0.07949 0.26200 0.93343
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.1612495 0.8031090 -0.201 0.841
MF_G 0.0004963 0.0001151 4.311 1.63e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 28.8685 on 12 degrees of freedom
Residual deviance: 5.5179 on 11 degrees of freedom
AIC: 75.171
Number of Fisher Scoring iterations: 4
#best
z <- glm(MF_P0.05 ~ MF_G, data = richness_MF, family = poisson(link = "log"))
y_max <- 35
richness_MF$mf_g = seq(4500, 7300, length.out = 13)
richness_MF$predicted = exp(z$coefficients[1] + z$coefficients[2] * richness_MF$mf_g)
richness_MF$Low = richness_MF$predicted - 2 * sqrt(mean(richness_MF$MF_P0.05))
richness_MF$Low[richness_MF$Low < 0] <- 0
richness_MF$High = richness_MF$predicted + 2 * sqrt(mean(richness_MF$MF_P0.05))
richness_MF$High[richness_MF$High > y_max] <- y_max
linear_plot <- ggplot(data = richness_MF) + geom_point(aes(x = MF_G, y = MF_P0.05, shape = richness_MF$location), size = 3, alpha = 0.9) + labs(shape = "sites") + scale_shape_manual(values = c(0, 1, 2, 5, 6))
linear_plot <- linear_plot + geom_line(aes(x = mf_g, y = predicted))
linear_plot <- linear_plot + geom_ribbon(aes(x = mf_g, ymin = Low, ymax = High), alpha = 0.1)
linear_plot <- linear_plot + xlab("MF_G") + ylab("MF_P") + ylim(0, y_max) + theme_bw()
plot(linear_plot)
Save it as the pdf file
pdf("figure01.pdf", width = 10, height = 7, onefile = FALSE)
plot(linear_plot)
# Close the PostScript device
dev.off()
null device
1
#glm and stepwise model selection
summary(step(glm(MF_P0.05 ~ MF_G + ASV_richness, data = richness_MF[-9, ], family = poisson(link = "log"))))
Start: AIC=70.23
MF_P0.05 ~ MF_G + ASV_richness
Df Deviance AIC
- ASV_richness 1 3.3491 68.587
<none> 2.9914 70.229
- MF_G 1 9.3325 74.570
Step: AIC=68.59
MF_P0.05 ~ MF_G
Df Deviance AIC
<none> 3.3491 68.587
- MF_G 1 20.3561 83.594
Call:
glm(formula = MF_P0.05 ~ MF_G, family = poisson(link = "log"),
data = richness_MF[-9, ])
Deviance Residuals:
Min 1Q Median 3Q Max
-1.27975 -0.21120 0.09244 0.27598 0.88318
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.6039451 1.3093124 -1.225 0.220564
MF_G 0.0007002 0.0001858 3.768 0.000165 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 20.3561 on 11 degrees of freedom
Residual deviance: 3.3491 on 10 degrees of freedom
AIC: 68.587
Number of Fisher Scoring iterations: 4
Ref: COLWELL et al. Ecology, 85(10), 2004, pp. 2717-2727
Q: How many species have each KO?
A: Inner product of the transposed KO table and ASV vector of the specific sample returns the vector regarding how many species have each KO
Note: n! = gamma(n+1) but it cannot be directly used
alpha_jh <- function(j, h, H) {
temp <- 0.0
if(j + h > H) {
temp <- 0.0
} else {
temp <- 1.0
for(k in 0:(h - 1)) temp <- temp*(H - j - k)/(H - k)
}
return(temp)
}
tau_chir <- function(h, S_obs, H, s){
temp <- S_obs
for (j in 1:H) temp <- temp - alpha_jh(j, h, H)*s[j];
return(temp)
}
#Example for a single sample
KO_dist <- t(KO_com_binarized) %*% stand_ASV_com_binarized$S02
hist(KO_dist)
Index for interporation function
s <- vector() #number of KOs with j time appearance
for(j in 1:max(KO_dist)) {
s[j] <- sum(KO_dist == j)
}
Trial of interporation (random extinction)
tau <- vector()
ASV_richness_02 <- data_summary$stand_ASV_richness[2]
KO_richness_02 <- data_summary$KO_richness[2]
for(x in 1:ASV_richness_02) tau[x] <- tau_chir(x, KO_richness_02, ASV_richness_02, s)
plot(1:ASV_richness_02, tau)
plot(log10(1:ASV_richness_02), log10(tau))
Fitting power-law regression with difference ranges and interval
x <- 1:ASV_richness_02
#full range of ASV richness and the fine interval
summary(fit_full <- lm(formula = log10(tau) ~ log10(x)))
Call:
lm(formula = log10(tau) ~ log10(x))
Residuals:
Min 1Q Median 3Q Max
-0.38874 -0.00900 0.00023 0.01135 0.01975
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.4551158 0.0025479 1356.1 <2e-16 ***
log10(x) 0.1230978 0.0008476 145.2 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.01849 on 2558 degrees of freedom
Multiple R-squared: 0.8918, Adjusted R-squared: 0.8918
F-statistic: 2.109e+04 on 1 and 2558 DF, p-value: < 2.2e-16
#For the coarse interval (every 5 %)
s <- ASV_richness_02/20
no_ASV <- vector()
no_ASV[1] <- 1
for(k in 2:20) no_ASV[k] <- round(s*(k - 1))
no_ASV[21] <- ASV_richness_02
#full range and coarse interval
summary(fit_full_c <- lm(formula = log10(tau[no_ASV]) ~ log10(x[no_ASV])))
Call:
lm(formula = log10(tau[no_ASV]) ~ log10(x[no_ASV]))
Residuals:
Min 1Q Median 3Q Max
-0.097240 -0.033375 -0.008542 0.028329 0.102984
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.16362 0.04445 71.18 < 2e-16 ***
log10(x[no_ASV]) 0.21822 0.01495 14.60 8.9e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04979 on 19 degrees of freedom
Multiple R-squared: 0.9181, Adjusted R-squared: 0.9138
F-statistic: 213 on 1 and 19 DF, p-value: 8.901e-12
#0-50% reduction range and the fine interval
summary(fit_50 <- lm(formula = log10(tau[no_ASV[11]:ASV_richness_02]) ~ log10(x[no_ASV[11]:ASV_richness_02])))
Call:
lm(formula = log10(tau[no_ASV[11]:ASV_richness_02]) ~ log10(x[no_ASV[11]:ASV_richness_02]))
Residuals:
Min 1Q Median 3Q Max
-5.130e-04 -1.526e-04 5.609e-05 1.821e-04 2.243e-04
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.642e+00 2.145e-04 16981.8 <2e-16 ***
log10(x[no_ASV[11]:ASV_richness_02]) 6.351e-02 6.547e-05 970.1 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0002014 on 1279 degrees of freedom
Multiple R-squared: 0.9986, Adjusted R-squared: 0.9986
F-statistic: 9.41e+05 on 1 and 1279 DF, p-value: < 2.2e-16
#0-50% reduction range and the coarse interval
summary(fit_50_c <- lm(formula = log10(tau[no_ASV[11:21]]) ~ log10(no_ASV[11:21])))
Call:
lm(formula = log10(tau[no_ASV[11:21]]) ~ log10(no_ASV[11:21]))
Residuals:
Min 1Q Median 3Q Max
-0.0004339 -0.0001518 0.0001014 0.0002110 0.0002695
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.6416629 0.0027765 1311.60 < 2e-16 ***
log10(no_ASV[11:21]) 0.0637098 0.0008479 75.14 6.63e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0002659 on 9 degrees of freedom
Multiple R-squared: 0.9984, Adjusted R-squared: 0.9982
F-statistic: 5646 on 1 and 9 DF, p-value: 6.626e-14
#0-90% reduction range and the fine interval
summary(fit_10 <- lm(formula = log10(tau[no_ASV[3]:ASV_richness_02]) ~ log10(x[no_ASV[3]:ASV_richness_02])))
Call:
lm(formula = log10(tau[no_ASV[3]:ASV_richness_02]) ~ log10(x[no_ASV[3]:ASV_richness_02]))
Residuals:
Min 1Q Median 3Q Max
-0.0095432 -0.0014807 0.0004911 0.0019216 0.0024601
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.5855191 0.0005641 6355.8 <2e-16 ***
log10(x[no_ASV[3]:ASV_richness_02]) 0.0810340 0.0001822 444.7 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.002235 on 2303 degrees of freedom
Multiple R-squared: 0.9885, Adjusted R-squared: 0.9885
F-statistic: 1.977e+05 on 1 and 2303 DF, p-value: < 2.2e-16
#0-90% reduction range and the coarse interval
summary(fit_10_c <- lm(formula = log10(tau[no_ASV[3:21]]) ~ log10(no_ASV[3:21])))
Call:
lm(formula = log10(tau[no_ASV[3:21]]) ~ log10(no_ASV[3:21]))
Residuals:
Min 1Q Median 3Q Max
-0.0076597 -0.0015030 0.0006151 0.0023998 0.0031114
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.578215 0.007448 480.46 <2e-16 ***
log10(no_ASV[3:21]) 0.083285 0.002412 34.53 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.002932 on 17 degrees of freedom
Multiple R-squared: 0.9859, Adjusted R-squared: 0.9851
F-statistic: 1192 on 1 and 17 DF, p-value: < 2.2e-16
plot(log10(tau) ~ log10(1:ASV_richness_02),
xlim = c(0, log10(ASV_richness_02)),
ylim = c(2, max(log10(tau))),
xlab = "Log10(ASV richness)",
ylab = "Log10(KO richness)",
main = "Ane river 20190709"
)
abline(fit_full, col = 2, lwd = 2)
abline(fit_10, col = 3, lwd = 2)
abline(fit_50, col = 4, lwd = 2)
abline(fit_full_c, col = 1, lty = "dashed")
abline(fit_10_c, col = 1, lty = "dashed")
abline(fit_50_c, col = 1, lty = "dashed")
abline(v = log10(no_ASV[3]), col = 3, lty = "dashed", lwd = 2)
abline(v = log10(no_ASV[11]), col = 4, lty = "dashed", lwd = 2)
fitted_model <- function(x) {
temp <- (10^fit_full$coefficients[1])*(x^fit_full$coefficients[2])
return(temp)
}
plot(1:ASV_richness_02, tau, xlim = c(1, ASV_richness_02), ylim = c(0, max(tau)))
par(new = T)
plot(1:ASV_richness_02, fitted_model(x), xlim = c(1, ASV_richness_02), ylim = c(0, max(tau)), xlab = "", ylab = "", col = 4, type = "l")
Definition of function, which has the binarized ASV composition and the binarized KO table as parameters
sample_ID <- c(1, 2, 3, 4, 5, 6, 7, 8, 11, 17, 18, 19, 20) #only a part of samples is used
random_extinction <- function(com_binarized, KO_binarized) {
ASV_richness <- vector()
KO_richness <- vector()
tau_one <- vector() #expected KO richness with a single ASV
slope_full <- vector() #slope of fitted power law
slope_full_c <- vector() #slope of fitted power law
slope_50 <- vector() #slope of fitted power law
slope_50_c <- vector() #slope of fitted power law
slope_10 <- vector() #slope of fitted power law
slope_10_c <- vector() #slope of fitted power law
slope_full_up <- vector() #upper limit of 95% confidence interval
slope_full_down <- vector() #lower limit of 95% confidence interval
slope_full_c_up <- vector() #upper limit of 95% confidence interval
slope_full_c_down <- vector() #lower limit of 95% confidence interval
slope_50_up <- vector() #upper limit of 95% confidence interval
slope_50_down <- vector() #lower limit of 95% confidence interval
slope_50_c_up <- vector() #upper limit of 95% confidence interval
slope_50_c_down <- vector() #lower limit of 95% confidence interval
slope_10_up <- vector() #upper limit of 95% confidence interval
slope_10_down <- vector() #lower limit of 95% confidence interval
slope_10_c_up <- vector() #upper limit of 95% confidence interval
slope_10_c_down <- vector() #lower limit of 95% confidence interval
intercept_full <- vector() #intercept of fitted power law
intercept_full_c <- vector() #intercept of fitted power law
intercept_50 <- vector() #intercept of fitted power law
intercept_50_c <- vector() #intercept of fitted power law
intercept_10 <- vector() #intercept of fitted power law
intercept_10_c <- vector() #intercept of fitted power law
R2fitted_full <- vector() #adjusted R-squared value of fitted power law
R2fitted_full_c <- vector() #adjusted R-squared value of fitted power law
R2fitted_50 <- vector() #adjusted R-squared value of fitted power law
R2fitted_50_c <- vector() #adjusted R-squared value of fitted power law
R2fitted_10 <- vector() #adjusted R-squared value of fitted power law
R2fitted_10_c <- vector() #adjusted R-squared value of fitted power law
no_ASV <- vector() #ASV richness used with the coarse interval
#For the coarse interval (every 5 %)
for(sample_no in 1:length(com_binarized[1,])){
#Number of ASVs harbored in each KO
KO_dist <- t(KO_binarized) %*% com_binarized[, sample_no]
s <- vector() #number of KOs with j time appearance
for(j in 1:max(KO_dist)) {
s[j] <- sum(KO_dist == j)
}
#ASV richness and KO richness
ASV_richness[sample_no] <- sum(com_binarized[, sample_no] > 0)
KO_richness[sample_no] <- sum(KO_dist > 0)
#Setting for the coarse interval
interval <- ASV_richness[sample_no]/20 #every 5% changes in ASV richness
no_ASV[1] <- 1
for(k in 2:20) no_ASV[k] <- round(interval*(k - 1))
no_ASV[21] <- ASV_richness[sample_no]
#interpolation
tau <- vector()
for(x in 1:ASV_richness[sample_no]) tau[x] <- tau_chir(x, KO_richness[sample_no], ASV_richness[sample_no], s)
#Setting for fitting power law
asv <- 1:ASV_richness[sample_no]
#full range of ASV richness and the fine interval
fit_full <- lm(formula = log10(tau) ~ log10(asv))
#full range and coarse interval
fit_full_c <- lm(formula = log10(tau[no_ASV]) ~ log10(asv[no_ASV]))
#0-50% reduction range and the fine interval
fit_50 <- lm(formula = log10(tau[no_ASV[11]:ASV_richness[sample_no]]) ~ log10(asv[no_ASV[11]:ASV_richness[sample_no]]))
#0-50% reduction range and the coarse interval
fit_50_c <- lm(formula = log10(tau[no_ASV[11:21]]) ~ log10(no_ASV[11:21]))
#0-90% reduction range and the fine interval
fit_10 <- lm(formula = log10(tau[no_ASV[3]:ASV_richness[sample_no]]) ~ log10(asv[no_ASV[3]:ASV_richness[sample_no]]))
#0-90% reduction range and the coarse interval
fit_10_c <- lm(formula = log10(tau[no_ASV[3:21]]) ~ log10(no_ASV[3:21]))
#Data summary
tau_one[sample_no] <- tau[1]
slope_full[sample_no] <- fit_full$coefficients[2]
slope_full_down[sample_no] <- confint.lm(fit_full)[2, 1]
slope_full_up[sample_no] <- confint.lm(fit_full)[2, 2]
intercept_full[sample_no] <- 10^fit_full$coefficients[1]
R2fitted_full[sample_no] <- summary(fit_full)$adj.r.squared
slope_full_c[sample_no] <- fit_full_c$coefficients[2]
slope_full_c_down[sample_no] <- confint.lm(fit_full_c)[2, 1]
slope_full_c_up[sample_no] <- confint.lm(fit_full_c)[2, 2]
intercept_full_c[sample_no] <- 10^fit_full_c$coefficients[1]
R2fitted_full_c[sample_no] <- summary(fit_full_c)$adj.r.squared
slope_50[sample_no] <- fit_50$coefficients[2]
slope_50_down[sample_no] <- confint.lm(fit_50)[2, 1]
slope_50_up[sample_no] <- confint.lm(fit_50)[2, 2]
intercept_50[sample_no] <- 10^fit_50$coefficients[1]
R2fitted_50[sample_no] <- summary(fit_50)$adj.r.squared
slope_50_c[sample_no] <- fit_50_c$coefficients[2]
slope_50_c_down[sample_no] <- confint.lm(fit_50_c)[2, 1]
slope_50_c_up[sample_no] <- confint.lm(fit_50_c)[2, 2]
intercept_50_c[sample_no] <- 10^fit_50_c$coefficients[1]
R2fitted_50_c[sample_no] <- summary(fit_50_c)$adj.r.squared
slope_10[sample_no] <- fit_10$coefficients[2]
slope_10_down[sample_no] <- confint.lm(fit_10)[2, 1]
slope_10_up[sample_no] <- confint.lm(fit_10)[2, 2]
intercept_10[sample_no] <- 10^fit_10$coefficients[1]
R2fitted_10[sample_no] <- summary(fit_10)$adj.r.squared
slope_10_c[sample_no] <- fit_10_c$coefficients[2]
slope_10_c_down[sample_no] <- confint.lm(fit_10_c)[2, 1]
slope_10_c_up[sample_no] <- confint.lm(fit_10_c)[2, 2]
intercept_10_c[sample_no] <- 10^fit_10_c$coefficients[1]
R2fitted_10_c[sample_no] <- summary(fit_10_c)$adj.r.squared
#log10-log10 scale fitting
plot(log10(asv), log10(tau), main = sample_info$Description[sample_ID[sample_no]], xlim = c(0, log10(ASV_richness[sample_no])), ylim = c(2.5, log10(max(tau))))
abline(fit_full, col = 2, lwd = 2)
abline(fit_10, col = 3, lwd = 2)
abline(fit_50, col = 4, lwd = 2)
abline(fit_full_c, col = 1, lty = "dashed")
abline(fit_10_c, col = 1, lty = "dashed")
abline(fit_50_c, col = 1, lty = "dashed")
abline(v = log10(no_ASV[3]), col = 3, lty = "dashed", lwd = 2)
abline(v = log10(no_ASV[11]), col = 4, lty = "dashed", lwd = 2)
if(sample_no == 1) {
pdf("figure02a.pdf", width = 10, height = 7, onefile = FALSE)
plot(log10(asv), log10(tau), main = sample_info$Description[sample_ID[sample_no]], xlim = c(0, log10(ASV_richness[sample_no])), ylim = c(2.5, log10(max(tau))))
abline(fit_full, col = 2, lwd = 2)
abline(fit_10, col = 3, lwd = 2)
abline(fit_50, col = 4, lwd = 2)
abline(fit_full_c, col = 1, lty = "dashed")
abline(fit_10_c, col = 1, lty = "dashed")
abline(fit_50_c, col = 1, lty = "dashed")
abline(v = log10(no_ASV[3]), col = 3, lty = "dashed", lwd = 2)
abline(v = log10(no_ASV[11]), col = 4, lty = "dashed", lwd = 2)
# Close the PostScript device
dev.off()
}
}
#Summarizing results into dataframe
summary_random_ext <- data.frame(
sample = sample_info$Description[sample_ID], ASV_richness, KO_richness,
tau_one,
intercept_full, intercept_full_c, intercept_50, intercept_50_c, intercept_10, intercept_10_c,
slope_full, slope_full_c, slope_50, slope_50_c, slope_10, slope_10_c,
slope_full_down, slope_full_c_down, slope_50_down, slope_50_c_down, slope_10_down, slope_10_c_down,
slope_full_up, slope_full_c_up, slope_50_up, slope_50_c_up, slope_10_up, slope_10_c_up,
R2fitted_full, R2fitted_full_c, R2fitted_50, R2fitted_50_c, R2fitted_10, R2fitted_10_c
)
return(summary_random_ext)
}
Using the ASV table without converting by bacterial direct count
random_analytical <- random_extinction(stand_ASV_com_binarized, KO_com_binarized)
random_analytical
Variations between samples (Figure 2b)
#plot(random_analytical[, ])
asv_redundancy <- lm(slope_10_c ~ ASV_richness, data = random_analytical)
summary(asv_redundancy)
Call:
lm(formula = slope_10_c ~ ASV_richness, data = random_analytical)
Residuals:
Min 1Q Median 3Q Max
-0.011978 -0.006656 -0.002459 0.006434 0.015596
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.551e-01 6.026e-03 25.742 3.51e-11 ***
ASV_richness -3.071e-05 3.200e-06 -9.596 1.12e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.008722 on 11 degrees of freedom
Multiple R-squared: 0.8933, Adjusted R-squared: 0.8836
F-statistic: 92.08 on 1 and 11 DF, p-value: 1.115e-06
plot(
slope_10_c ~ ASV_richness, data = random_analytical,
pch = c(0,1,2,5,6)[as.factor(attributes_sample$location[sample_ID])],
col = c(1,2,3,4,5,6)[as.factor(attributes_sample$date_sample[sample_ID])],
xlab = "ASV richness", ylab = "multifunctional redundancy exponent (a)",
ylim = c(0.06, 0.16)
)
abline(asv_redundancy)
legend("topright", legend = levels(as.factor(attributes_sample$location)), pch = c(0,1,2,5,6))
legend("bottomleft", legend = levels(as.factor(attributes_sample$date_sample[sample_ID])), col = c(1,2,3,4,5,6), pch = c(1,1,1,1,1,1))
#text(random_result$ASV_richness, random_result$slope,labels = attributes_sample$location[sample_ID], cex = 0.6, pos = 4, adj = 1)
pdf("figure02b.pdf", width = 10, height = 7, onefile = FALSE)
plot(
slope_10_c ~ ASV_richness, data = random_analytical,
pch = c(0,1,2,5,6)[as.factor(attributes_sample$location[sample_ID])],
col = c(1,2,3,4,5,6)[as.factor(attributes_sample$date_sample[sample_ID])],
xlab = "ASV richness", ylab = "multifunctional redundancy exponent (a)",
ylim = c(0.06, 0.16)
)
abline(asv_redundancy)
legend("topright", legend = levels(as.factor(attributes_sample$location)), pch = c(0,1,2,5,6))
legend("bottomleft", legend = levels(as.factor(attributes_sample$date_sample[sample_ID])), col = c(1,2,3,4,5,6), pch = c(1,1,1,1,1,1))
# Close the PostScript device
dev.off()
null device
1
asv_redundancy2 <- lm(slope_10_c ~ ASV_richness, data = random_analytical[-9, ])
summary(asv_redundancy2)
Call:
lm(formula = slope_10_c ~ ASV_richness, data = random_analytical[-9,
])
Residuals:
Min 1Q Median 3Q Max
-0.012297 -0.006936 -0.000970 0.006783 0.014510
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.566e-01 7.761e-03 20.183 1.96e-09 ***
ASV_richness -3.142e-05 3.964e-06 -7.927 1.28e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.009097 on 10 degrees of freedom
Multiple R-squared: 0.8627, Adjusted R-squared: 0.849
F-statistic: 62.83 on 1 and 10 DF, p-value: 1.276e-05
The slope was much shallower than those from Miki et al. 2014. This is probably because the unit of extinction is ASV but not the OTU or species. When the unit of extinction if ASV, the functional redundancy should be much greater than those between OTUs or species. In Miki et al. 2014, in most of simulations, we used the pseudo-communities where single species from a genus was included.
Keywords: functional redundancy and functional vulnerability
survival_rate <- readRDS("./Jan2024v3/survival_rate/survial_rate_del0ASV_exclu_undeter.eukar_AfterNeg_Jan2024.obj")
#adding random case
survival_rate_mod <- data.frame(random = rep(1, length(survival_rate[, 1])),
F1 = survival_rate$KO_no_pa,
F2 = survival_rate$KO_no_pa_inverse,
F3 = survival_rate$dist_KO_pa_mean,
F4 = survival_rate$dist_KO_pa_mean_inverse,
A1 = survival_rate$abundance_mean,
A2 = survival_rate$abundance_inverse_sd,
A3 = survival_rate$abundance_rich,
A4 = survival_rate$abundance_sha
)
At each level of ASV richness, calculating the averages from repetition first and make a single line with averages
#for sample_1
presence <- which(stand_ASV_com_binarized$S01 > 0)
#ASV with greater richness of KOs (longer genomes) has a higher survival probability
prob_test <- prop.table(survival_rate_mod$F1[presence])
ASV_richness <- length(presence)
no_repetition <- 10
ASVs <- numeric(21)
KOs <- numeric(21)
KOs_temp <- numeric(no_repetition)
survived_ASVs <- list()
ASVs_full <- ASV_richness
KOs_full <- richness_MF$MF_G
survived_ASVs <- list()
s <- ASV_richness/20
slope <- numeric(no_repetition)
#Preparing survival sequences
for(j in 1:no_repetition) {
#fully shuffling for the order of survival (the inverse order of extinction)
survived_ASVs[[j]] <- sample(1:ASV_richness, size = ASV_richness, replace = FALSE, prob = prob_test)
#survived_ASVs[[j]] <- sample(1:length(presence), size = length(presence), replace = FALSE)
}
#For plotting each survival sequence
for(j in 1:no_repetition) {
#The case with a single ASV
ASVs[1] <- 1
KO_list_survived <- KO_com_binarized[survived_ASVs[[j]][1], ]
KOs[1] <- sum(KO_list_survived > 0)
for(k in 2:20) {
survived_richness <- round(s*(k - 1))
ASVs[k] <- survived_richness
KO_survived <- KO_com_binarized[presence[survived_ASVs[[j]][1:survived_richness]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs[k] <- sum(KO_list_survived > 0)
}
#The case with all ASVs
k <- 21
ASVs[k] <- ASVs_full
KOs[k] <- KOs_full
fitted_power <- lm(formula = log10(KOs) ~ log10(ASVs))
slope[j] <- fitted_power$coefficients[2]
plot(log10(KOs) ~ log10(ASVs) , type = "l", xlim = c(log10(1), log10(ASVs[21])), ylim = c(log10(100), log10(KOs[21])), col = 8)
par(new = T)
#print(j)
}
#For plotting with averaging KOs at each survival level
#The case with a single ASV
ASVs[1] <- 1
for(j in 1:no_repetition) {
KO_list_survived <- KO_com_binarized[survived_ASVs[[j]][1], ]
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[1] <- mean(KOs_temp)
for(k in 2:20) {
survived_richness <- round(s*(k - 1))
ASVs[k] <- survived_richness
for(j in 1:no_repetition) {
KO_survived <- KO_com_binarized[presence[survived_ASVs[[j]][1:survived_richness]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[k] <- mean(KOs_temp)
}
k <- 21
ASVs[k] <- ASVs_full
KOs[k] <- KOs_full
fitted_power <- lm(formula = log10(KOs) ~ log10(ASVs))
slope <- fitted_power$coefficients[2]
plot(log10(KOs) ~ log10(ASVs) , type = "b", xlim = c(log10(1), log10(ASVs[21])), ylim = c(log10(100), log10(KOs[21])), cex = 2, col = 4, main = "Smaller KO richness goes extinct earlier")
#print(j)
confint.lm(fitted_power)
2.5 % 97.5 %
(Intercept) 3.0439433 3.2292720
log10(ASVs) 0.2121199 0.2903613
At each level of ASV richness, calculating the averages from repetition first and make a single line with averages
#for sample_01
presence <- which(stand_ASV_com_binarized$S01 > 0)
#ASV with greater richness of KOs (longer genomes) has a higher survival probability
ASV_richness <- length(presence)
no_repetition <- 100
ASVs <- numeric(21)
KOs <- list()
KOs_temp <- numeric(no_repetition)
survived_ASVs <- list()
ASVs_full <- ASV_richness
KOs_full <- richness_MF$MF_G[1]
survived_ASVs <- list()
s <- ASV_richness/20
#Nine different scenarios, including the random-extinction scenario
for(i in 1:9) {
KOs[[i]] <- numeric(21)
prob_test <- prop.table(survival_rate_mod[, i][presence])
#Preparing survival sequences
for(j in 1:no_repetition) {
#fully shuffling for the order of survival (the inverse order of extinction)
survived_ASVs[[j]] <- sample(1:ASV_richness, size = ASV_richness, replace = FALSE, prob = prob_test)
#survived_ASVs[[j]] <- sample(1:length(presence), size = length(presence), replace = FALSE)
}
#For plotting with averaging KOs at each survival level
#The case with a single ASV
ASVs[1] <- 1
for(j in 1:no_repetition) {
KO_list_survived <- KO_com_binarized[survived_ASVs[[j]][1], ]
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[[i]][1] <- mean(KOs_temp)
for(k in 2:20) {
survived_richness <- round(s*(k - 1))
ASVs[k] <- survived_richness
for(j in 1:no_repetition) {
KO_survived <- KO_com_binarized[presence[survived_ASVs[[j]][1:survived_richness]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[[i]][k] <- mean(KOs_temp)
}
k <- 21
ASVs[k] <- ASVs_full
KOs[[i]][k] <- KOs_full
color_specific <- c("black", "darkgoldenrod1","coral1","hotpink3", "brown4", "lightslategray", "deepskyblue", "royalblue4", "seagreen3")
if(i == 1) {
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], main = "All scenarios for Biwa0703", xlab = "ASV richness", ylab = "KO richness")
} else {
par(new = T)
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], pch = i, main = "", xlab = "", ylab = "")
}
labels <- colnames(survival_rate_mod)
legend("bottomright", legend = labels, col = color_specific, pch = 1:9)
}
for(i in 1:9){
if(i == 1) {
plot(log10(KOs[[i]][2:21]) ~ log10(ASVs[2:21]), type = "b", xlim = c(log10(ASVs[2]), log10(ASVs[21])), ylim = c(log10(3000), log10(KOs[[i]][21])), cex = 2, col = color_specific[i], main = "All scenarios for Biwa0703", xlab = "ASV richness", ylab = "KO richness")
} else {
par(new = T)
plot(log10(KOs[[i]][2:21]) ~ log10(ASVs[2:21]), type = "b", xlim = c(log10(ASVs[2]), log10(ASVs[21])), ylim = c(log10(3000), log10(KOs[[i]][21])), cex = 2, col = color_specific[i], pch = i, main = "", xlab = "", ylab = "")
}
labels <- colnames(survival_rate_mod)
legend("bottomright", legend = labels, col = color_specific, pch = 1:9)
}
pdf("figure03a.pdf", width = 10, height = 7, onefile = FALSE)
for(i in 1:9){
if(i == 1) {
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], main = "All scenarios for Biwa0703", xlab = "ASV richness", ylab = "KO richness")
} else {
par(new = T)
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], pch = i, main = "", xlab = "", ylab = "")
}
}
labels <- colnames(survival_rate_mod)
legend("bottomright", legend = labels, col = color_specific, pch = 1:9)
dev.off()
png
2
At each level of ASV richness, calculating the averages from repetition first and make a single line with averages
#for sample_05
presence <- which(stand_ASV_com_binarized$S05 > 0)
#ASV with greater richness of KOs (longer genomes) has a higher survival probability
ASV_richness <- length(presence)
no_repetition <- 100
ASVs <- numeric(21)
KOs <- list()
KOs_temp <- numeric(no_repetition)
survived_ASVs <- list()
ASVs_full <- ASV_richness
KOs_full <- richness_MF$MF_G[5]
survived_ASVs <- list()
s <- ASV_richness/20
#Nine different scenarios, including the random-extinction scenario
for(i in 1:9) {
KOs[[i]] <- numeric(21)
prob_test <- prop.table(survival_rate_mod[, i][presence])
#Preparing survival sequences
for(j in 1:no_repetition) {
#fully shuffling for the order of survival (the inverse order of extinction)
survived_ASVs[[j]] <- sample(1:ASV_richness, size = ASV_richness, replace = FALSE, prob = prob_test)
#survived_ASVs[[j]] <- sample(1:length(presence), size = length(presence), replace = FALSE)
}
#For plotting with averaging KOs at each survival level
#The case with a single ASV
ASVs[1] <- 1
for(j in 1:no_repetition) {
KO_list_survived <- KO_com_binarized[survived_ASVs[[j]][1], ]
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[[i]][1] <- mean(KOs_temp)
for(k in 2:20) {
survived_richness <- round(s*(k - 1))
ASVs[k] <- survived_richness
for(j in 1:no_repetition) {
KO_survived <- KO_com_binarized[presence[survived_ASVs[[j]][1:survived_richness]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[[i]][k] <- mean(KOs_temp)
}
k <- 21
ASVs[k] <- ASVs_full
KOs[[i]][k] <- KOs_full
if(i == 1) {
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], main = "All scenarios for Yasu0709", xlab = "ASV richness", ylab = "KO richness")
} else {
par(new = T)
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], pch = i, main = "", xlab = "", ylab = "")
}
labels <- colnames(survival_rate_mod)
legend("bottomright", legend = labels, col = color_specific, pch = 1:9)
}
for(i in 1:9){
if(i == 1) {
plot(log10(KOs[[i]][2:21]) ~ log10(ASVs[2:21]), type = "b", xlim = c(log10(ASVs[2]), log10(ASVs[21])), ylim = c(log10(3000), log10(KOs[[i]][21])), cex = 2, col = color_specific[i], main = "All scenarios for Yasu0709", xlab = "ASV richness", ylab = "KO richness")
} else {
par(new = T)
plot(log10(KOs[[i]][2:21]) ~ log10(ASVs[2:21]), type = "b", xlim = c(log10(ASVs[2]), log10(ASVs[21])), ylim = c(log10(3000), log10(KOs[[i]][21])), cex = 2, col = color_specific[i], pch = i, main = "", xlab = "", ylab = "")
}
labels <- colnames(survival_rate_mod)
legend("bottomright", legend = labels, col = color_specific, pch = 1:9)
}
pdf("figure03b.pdf", width = 10, height = 7, onefile = FALSE)
for(i in 1:9){
if(i == 1) {
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], main = "All scenarios for Yasu0709", xlab = "ASV richness", ylab = "KO richness")
} else {
par(new = T)
plot(KOs[[i]][2:21] ~ ASVs[2:21] , type = "b", xlim = c(ASVs[2], ASVs[21]), ylim = c(3000, KOs[[i]][21]), cex = 2, col = color_specific[i], pch = i, main = "", xlab = "", ylab = "")
}
}
labels <- colnames(survival_rate_mod)
legend("bottomright", legend = labels, col = color_specific, pch = 1:9)
dev.off()
png
2
sample_ID <- c(1, 2, 3, 4, 5, 6, 7, 8, 11, 17, 18, 19, 20) #only a part of samples is used
###list of parameters###
#com_binarized: ASV composition dataframe binarized
#KO_binarized: KO composition dataframe binarized
#surv_prob: extinction sequences (with replications) as a list
sim_extinction <- function(com_binarized, KO_binarized, surv_prob = NULL, repetition, title = "random", graphic = TRUE) {
presence <- numeric(length(sample_ID))
ASVs_full <- numeric(length(sample_ID))
KOs_full <- numeric(length(sample_ID))
slope <- numeric(length(sample_ID))
slope_up <- numeric(length(sample_ID))
slope_down <- numeric(length(sample_ID))
r2_adjusted <- numeric(length(sample_ID))
#for(sample_no in 1:2){
for(sample_no in 1:length(com_binarized[1,])){
presence <- which(com_binarized[, sample_no] > 0)
ASV_richness <- length(presence)
ASVs <- numeric(21)
KOs <- numeric(21)
KOs_temp <- numeric(repetition)
survived_ASVs <- list()
ASVs_full[sample_no] <- ASV_richness
KOs_full[sample_no] <- sum(t(KO_binarized) %*% com_binarized[, sample_no] > 0)
s <- ASV_richness/20
#Refresh the graphics for each sample_no
par(new = F)
#Setting survival probability
if(is.null(surv_prob)) {
survival_prob <- NULL
} else {
survival_prob <- surv_prob[presence]
}
#Preparing survival sequences
for(j in 1:repetition) {
#fully shuffling for the order of appearance (the inverse order of extinction)
survived_ASVs[[j]] <- sample(1:length(presence), size = length(presence), replace = FALSE, prob = survival_prob)
}
#For plotting each extinction sequence
for(j in 1:repetition) {
#The case with a single ASV
#ASVs[1] <- 1
#KO_list_survived <- KO_binarized[survived_ASVs[[j]][1], ]
#KOs[1] <- sum(KO_list_survived > 0)
for(k in 3:20) {
survived_richness <- round(s*(k - 1))
ASVs[k] <- survived_richness
KO_survived <- KO_binarized[presence[survived_ASVs[[j]][1:survived_richness]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs[k] <- sum(KO_list_survived > 0)
}
#The case with all ASVs
k <- 21
ASVs[k] <- ASVs_full[sample_no]
KOs[k] <- KOs_full[sample_no]
#fitted_power <- lm(formula = log10(KOs) ~ log10(ASVs))
#slope_each[j] <- fitted_power$coefficients[2]
if(graphic == TRUE) {
plot(log10(KOs[3:21]) ~ log10(ASVs[3:21]) , type = "l", xlim = c(log10(ASVs[3]), log10(ASVs[21])), ylim = c(log10(3000), log10(KOs[21])), col = 8)
par(new = T)
}
#print(j)
}#end of for j
#For plotting with averaging KOs at each survival level
#The case with a single ASV
#ASVs[1] <- 1
for(j in 1:repetition) {
KO_list_survived <- KO_binarized[survived_ASVs[[j]][1], ]
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[1] <- mean(KOs_temp)
for(k in 3:20) {
survived_richness <- round(s*(k - 1))
ASVs[k] <- survived_richness
for(j in 1:repetition) {
KO_survived <- KO_binarized[presence[survived_ASVs[[j]][1:survived_richness]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs_temp[j] <- sum(KO_list_survived > 0)
}
KOs[k] <- mean(KOs_temp)
}
k <- 21
ASVs[k] <- ASVs_full[[sample_no]]
KOs[k] <- KOs_full[[sample_no]]
#0-90% reduction with the coarse interval
fitted_power <- lm(formula = log10(KOs[3:21]) ~ log10(ASVs[3:21]))
slope[sample_no] <- fitted_power$coefficients[2]
title_text <- paste(sample_info$Description[sample_ID[sample_no]], title, sep = ":")
if(graphic == TRUE) {
plot(log10(KOs[3:21]) ~ log10(ASVs[3:21]) , type = "b", xlim = c(log10(ASVs[3]), log10(ASVs[21])), ylim = c(log10(3000), log10(KOs[21])), cex = 2, col = 4, main = title_text)
#print(j)
abline(fitted_power, col = 5)
}
slope_down[sample_no] <- confint.lm(fitted_power)[2, 1]
slope_up[sample_no] <- confint.lm(fitted_power)[2, 2]
r2_adjusted[sample_no] <- summary(fitted_power)$adj.r.squared
}#end of for sample_no
summary_extinction <- data.frame(sample = sample_info$Description[sample_ID], ASVs_full, KOs_full, slope, slope_down, slope_up, r2_adjusted)
return(summary_extinction)
}
#set.seed(1234567)
nonrandom_sim <- list()
labels <- colnames(survival_rate_mod)
nonrandom_sim <- mclapply(1:9, function(i) {
result_sim <- sim_extinction(stand_ASV_com_binarized, KO_com_binarized, surv_prob = survival_rate_mod[, i], repetition = 100, title = labels[i], graphic = FALSE)
result_sim
}, mc.cores = 9)
Results
nonrandom_sim
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
NA
Preparing a dataframe for graphic presentation
slope_r <- list()
for(j in 1:9) {
slope_r[[j]] <- nonrandom_sim[[j]]$slope
}
slope_summary <- NULL
for(j in 1:9) {
slope_summary <- append(slope_summary, slope_r[[j]])
}
length(slope_summary)
[1] 117
sample_id <- rep(nonrandom_sim[[1]]$sample, 9)
sites <- rep(c("biwa", "ane", "echi", "hino", "yasu", "biwa", "ane", "echi", "biwa", "ane", "echi", "hino", "yasu"), 9)
dates <- rep(c("20190703", "20190709", "20190709", "20190709", "20190709", "20190730", "20190807", "20190807", "20190910", "20191015", "20191015", "20191015", "20191015"), 9)
xtype <- c(rep("random", 13), rep("F1", 13), rep("F2", 13), rep("F3", 13), rep("F4", 13), rep("A1", 13), rep("A2", 13), rep("A3", 13), rep("A4", 13))
xtype_n <- c(rep(1, 13), rep(2, 13), rep(3, 13), rep(4, 13), rep(5, 13), rep(6, 13), rep(7, 13), rep(8, 13), rep(9, 13))
slope_2 <- slope_summary - rep(slope_summary[1:13], 9) #normalized by random scenario through substraction
slope_3 <- slope_summary/rep(slope_summary[1:13], 9) #normalized by random scenario through division
slope_summary2 <- data.frame(dates, sites, xtype, xtype_n, slope = slope_summary, slope_ns = slope_2, slope_nd <- slope_3)
slope_summary2$xtype2 <- factor(slope_summary2$xtype, levels=c("random", "F1", "F2", "F3", "F4", "A1", "A2", "A3", "A4"))
Figure 4
custom_labels <- c("random", "F1", "F2", "F3", "F4", "A1", "A2", "A3", "A4")
fig4_no1 <- ggplot(data = slope_summary2) +
geom_line(aes(x = xtype_n, y = slope, group = sample_id, color = dates)) +
geom_point(aes(x = xtype_n, y = slope, color = dates, shape = sites), size = 3) +
scale_x_continuous(breaks = c(1:9), labels = custom_labels) + scale_shape_manual(values = c(15, 16, 17, 5, 6)) + theme_bw() +
labs (title = "Direct comparison of exponents", x = "extinction scenarios", y = "redundancy exponent (a)")
fig4_no2 <- ggplot(data = slope_summary2) +
geom_line(aes(x = xtype_n, y = slope_ns, group = sample_id, color = dates)) +
geom_point(aes(x = xtype_n, y = slope_ns, color = dates, shape = sites), size = 3) +
scale_x_continuous(breaks = c(1:9), labels = custom_labels) + scale_shape_manual(values = c(15, 16, 17, 5, 6)) + theme_bw() +
labs (title = "Comparison through substracting random scenario of exponents", x = "extinction scenarios", y = "redundancy exponent (a)")
fig4_no3 <- ggplot(data = slope_summary2) +
geom_line(aes(x = xtype_n, y = slope_nd, group = sample_id, color = dates)) +
geom_point(aes(x = xtype_n, y = slope_nd, color = dates, shape = sites), size = 3) +
scale_x_continuous(breaks = c(1:9), labels = custom_labels) + scale_shape_manual(values = c(15, 16, 17, 5, 6)) + theme_bw() +
labs (title = "Comparison through divising by random scenario of exponents", x = "extinction scenarios", y = "redundancy exponent (a)")
fig4_no4 <- ggplot(slope_summary2, aes(xtype2, slope_ns))+
geom_boxplot(outlier.shape = NA)+
geom_jitter(aes(shape = sites, color = dates),width = 0.25, size = 3)+
scale_shape_manual(values = c(15, 16, 17, 5, 6))+
theme_bw() +
labs (title = "Comparison through substracting random scenario of exponents", x = "Extinction scenarios", y = "Redundancy exponent (a)")
plot(fig4_no1)
plot(fig4_no2)
plot(fig4_no3)
plot(fig4_no4)
pdf("figure04a.pdf", width = 10, height = 7, onefile = FALSE)
plot(fig4_no1)
# Close the PostScript device
dev.off()
png
2
pdf("figure04b.pdf", width = 10, height = 7, onefile = FALSE)
plot(fig4_no4)
# Close the PostScript device
dev.off()
png
2
At each level of ASV richness for each extinction scenario, preparing 200 replications
sim_extinctionM <- function(com_binarized, KO_binarized, surv_prob = NULL, no_repetition, scenario = "S1") {
ASV_richnessM <- length(com_binarized[, 1])
KO_richnessM <- sum(apply(KO_binarized, 2, sum) > 0)
prob_testM <- prop.table(surv_prob)
ASVsM <- numeric(21)
KOs_tempM <- numeric(no_repetition)
KOs_975 <- numeric(21)
KOs_025 <- numeric(21)
KOs_mean <- numeric(21)
survived_ASVsM <- list()
s <- ASV_richnessM/20
#Preparing survival sequences
for(j in 1:no_repetition) {
#fully shuffling for the order of survival (the inverse order of extinction)
survived_ASVsM[[j]] <- sample(1:ASV_richnessM, size = ASV_richnessM, replace = FALSE, prob = prob_testM)
}
k <- 1
ASVsM[k] <- 100
for(j in 1:no_repetition) {
KO_survived <- KO_com_binarized[survived_ASVsM[[j]][1:ASVsM[k]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs_tempM[j] <- sum(KO_list_survived > 0)
}
KOs_mean[k] <- mean(KOs_tempM) #mean of the replications
KOs_975[k] <- quantile(KOs_tempM, prob = 0.975) #Upper boundary of 95% confidence interval
KOs_025[k] <- quantile(KOs_tempM, prob = 0.025) #Lower boundary of 95% confidence interval
for(k in 2:20) {
ASVsM[k] <- round(s*(k - 1))
for(j in 1:no_repetition) {
KO_survived <- KO_com_binarized[survived_ASVsM[[j]][1:ASVsM[k]], ]
KO_list_survived <- apply(KO_survived, 2, sum)
KOs_tempM[j] <- sum(KO_list_survived > 0)
}
KOs_mean[k] <- mean(KOs_tempM) #mean of the replications
KOs_975[k] <- quantile(KOs_tempM, prob = 0.975) #Upper boundary of 95% confidence interval
KOs_025[k] <- quantile(KOs_tempM, prob = 0.025) #Lower boundary of 95% confidence interval
}
#The case with all ASVs
k <- 21
ASVsM[k] <- ASV_richnessM
KOs_mean[k] <- KO_richnessM
KOs_975[k] <- KO_richnessM
KOs_025[k] <- KO_richnessM
simM <- data.frame(s = rep(scenario, 21), ASV_richness = ASVsM, KOs_mean, KOs_975, KOs_025)
return(simM)
}
test_randomM <- sim_extinctionM(stand_ASV_com_binarized, KO_com_binarized, surv_prob = survival_rate_mod$F1, no_repetition = 10, scenario = "random")
#For KO richness plot
meta_plot <- ggplot(data = test_randomM) + geom_line(aes(x = ASV_richness, y = KOs_mean))
meta_plot <- meta_plot + geom_ribbon(aes(x = ASV_richness, ymin = KOs_025, ymax = KOs_975), fill = "red", alpha = 0.5)
plot(meta_plot)
meta_plot <- meta_plot + geom_point(data = richness_MF, aes(x = ASV_richness, y = MF_G, shape = richness_MF$location), size = 3, alpha = 0.9) + labs(shape = "sites") + scale_shape_manual(values = c(0, 1, 2, 5, 6))
plot(meta_plot)
meta_sim <- mclapply(1:9, function(i) {
result_sim <- sim_extinctionM(stand_ASV_com_binarized, KO_com_binarized, surv_prob = survival_rate_mod[, i], no_repetition = 200, scenario = colnames(survival_rate_mod)[i])
result_sim
}, mc.cores = 9)
meta_sim
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
NA
meta_plot_j <- list()
for(j in 1:9) {
meta_plot <- ggplot(data = meta_sim[[j]]) + geom_line(aes(x = ASV_richness, y = KOs_mean))
meta_plot <- meta_plot + geom_ribbon(aes(x = ASV_richness, ymin = KOs_025, ymax = KOs_975), fill = j, alpha = 0.5)
#random range
meta_plot <- meta_plot + geom_line(data = meta_sim[[1]], aes(x = ASV_richness, y = KOs_025), linetype = "dashed")
meta_plot <- meta_plot + geom_line(data = meta_sim[[1]], aes(x = ASV_richness, y = KOs_975), linetype = "dashed")
meta_plot <- meta_plot + geom_point(data = richness_MF, aes(x = ASV_richness, y = MF_G, shape = richness_MF$location), size = 3, alpha = 0.9) + labs(shape = "sites") + scale_shape_manual(values = c(0, 1, 2, 5, 6))
meta_plot_j[[j]] <- meta_plot
}
for(j in 1:9){
plot(meta_plot_j[[j]])
}
For generating pdfs
for(j in 2:5){
file_name <- paste("figure05_", j-1, ".pdf", sep = "")
pdf(file_name, width = 10, height = 7, onefile = FALSE)
plot(meta_plot_j[[j]])
# Close the PostScript device
dev.off()
}
for(j in 6:9){
file_name <- paste("figureS2_", j-5, ".pdf", sep = "")
pdf(file_name, width = 10, height = 7, onefile = FALSE)
plot(meta_plot_j[[j]])
# Close the PostScript device
dev.off()
}
meta_sim_all <- NULL
for(j in 1:9) {
meta_sim_all <- rbind.data.frame(meta_sim_all, meta_sim[[j]])
}
#ribbon plot
meta_plot_all <- ggplot(data = meta_sim_all) #+ geom_line(aes(x = ASV_richness, y = KOs_mean, group = s, color = s))
for(j in 1:5) {
start <- 1 + 21*(j - 1)
end <- 21*j
meta_plot_all <- meta_plot_all + geom_ribbon(data = meta_sim_all[start:end, ], aes(x = ASV_richness, ymin = KOs_025, ymax = KOs_975), fill = j, alpha = 0.5)
}
meta_plot_all <- meta_plot_all + geom_point(data = richness_MF, aes(x = ASV_richness, y = MF_G, shape = richness_MF$location), size = 3, alpha = 0.4) + labs(shape = "sites") + scale_shape_manual(values = c(0, 1, 2, 5, 6))
plot(meta_plot_all)
#line plot
meta_plot_all <- ggplot(data = meta_sim_all) #+ geom_line(aes(x = ASV_richness, y = KOs_mean, group = s, color = s))
for(j in 1:5) {
start <- 1 + 21*(j - 1)
end <- 21*j
meta_plot_all <- meta_plot_all + geom_line(data = meta_sim_all[start:end, ], aes(x = ASV_richness, y = KOs_025), color = j)
meta_plot_all <- meta_plot_all + geom_line(data = meta_sim_all[start:end, ], aes(x = ASV_richness, y = KOs_975), color = j)
}
meta_plot_all <- meta_plot_all + geom_point(data = richness_MF, aes(x = ASV_richness, y = MF_G, shape = richness_MF$location), size = 3, alpha = 0.4) + labs(shape = "sites") + scale_shape_manual(values = c(0, 1, 2, 5, 6))
plot(meta_plot_all)
plot(
data_summary$stand_ASV_richness[sample_ID] ~ data_summary$bacterial.abundance[sample_ID],
pch = c(0,1,2,5,6)[as.factor(data_summary$location[sample_ID])],
col = c(1,2,3,4,5,6)[as.factor(attributes_sample$date_sample[sample_ID])],
xlab = "Bacterial Abundance (cells/mL)", ylab = "ASV richness"
)
legend("topright", legend = levels(as.factor(data_summary$location)), pch = c(0,1,2,5,6))
#legend("bottomleft", legend = levels(as.factor(data_summary$date_sample[sample_ID])), col = c(1,2,3,4,5,6), pch = c(1,1,1,1,1,1))
Cite: Why are some microbes more ubiquitous than others? Predicting the habitat breadth of soil bacteria, Ecology Letters, (2014) 17: 794–802 doi: 10.1111/ele.12282
plot(survival_rate_mod$F4 ~ survival_rate_mod$F2,
xlab = "Inverse of KO richness",
ylab = "Inverse of the mean dissimilarity"
)
plot(log10(survival_rate_mod$A1) ~ survival_rate_mod$F1,
xlab = "KO richness",
ylab = "log10(mean abundance)"
)
f1_a1_model <- lm(log10(A1) ~ F1, data = survival_rate_mod)
summary(f1_a1_model)
Call:
lm(formula = log10(A1) ~ F1, data = survival_rate_mod)
Residuals:
Min 1Q Median 3Q Max
-2.57323 -0.38115 0.02451 0.41292 2.93730
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.171e+00 4.105e-02 52.9 <2e-16 ***
F1 4.611e-04 3.468e-05 13.3 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6916 on 3574 degrees of freedom
Multiple R-squared: 0.04715, Adjusted R-squared: 0.04688
F-statistic: 176.9 on 1 and 3574 DF, p-value: < 2.2e-16
abline(f1_a1_model)
plot(survival_rate_mod$A3 ~ survival_rate_mod$F1,
xlab = "KO richness",
ylab = "No. of occurrence")
f1_a3_model <- lm(A3 ~ F1, data = survival_rate_mod)
summary(f1_a3_model)
Call:
lm(formula = A3 ~ F1, data = survival_rate_mod)
Residuals:
Min 1Q Median 3Q Max
-8.6984 -2.0013 -0.0598 1.9386 7.5665
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.9165322 0.1580728 24.78 <2e-16 ***
F1 0.0020724 0.0001335 15.52 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.663 on 3574 degrees of freedom
Multiple R-squared: 0.06313, Adjusted R-squared: 0.06286
F-statistic: 240.8 on 1 and 3574 DF, p-value: < 2.2e-16
abline(f1_a3_model)
plot(survival_rate_mod[, -1])
Based on the # of KOs that are not shared with all other ASVs
This could be directly compared to Fig.3 in Ruhl et al. 2022